# 爬取新闻
import requests
from lxml import etree

# 名字(地址)
html_url = 'https://xs.sogou.com/list/8221506120'
novel = requests.get(html_url).text
html = etree.HTML(novel)

# 目录名称
name = html.xpath('//div[@class="chapter-box"]/ul/li/a/span/text()')  # Ctrl+shift+x，打开，div标签

# 链接
url = html.xpath('//div[@class="chapter-box"]/ul/li/a/@href')
print(url)

# 一行一行输出
for names,urls in zip(name,url):
	# 章节链接
	urls = 'https://xiaoshuo.sogou.com/' + urls

	# 发出请求
	novel_html = requests.get(urls).text
	novel_html = etree.HTML(novel_html)

	# 求内容 p标签
	novel_content = novel_html.xpath('//div[@id="contentWp"]/p/text()')

	# 网页内容 拆开一行一行输出
	for novel_content in novel_content:
		# print(novel_content)
		# 路径和module
		with open(f'./新闻1/{names}.txt','a')as f:
			f.write(novel_content)